import pandas as pd
import numpy as np
# 提取类型与得分投票的关系
df = pd.read_csv(r"DSProject\csv\preprocessed2.csv")
genre_rating_votes = df.loc[:,"is_Action":"num_votes"]
cols = genre_rating_votes.columns[0:-3]
idx = genre_rating_votes.columns[-2:]
cols = genre_rating_votes.columns[0:-3]
idx = genre_rating_votes.columns[-2:]
summary = pd.DataFrame(index = idx,columns = cols)
count = pd.DataFrame(index = ["count"],columns = cols)
summary = pd.concat((summary,count),axis = 0)
summary[:] = 0
for i in range(len(genre_rating_votes)):
    row = genre_rating_votes.iloc[i]
    for col in cols:
        if row[col] == 1:
            summary.loc["average_rating",col]+=row["average_rating"]
            summary.loc["num_votes",col]+=row["num_votes"]
            summary.loc["count",col]+=1
summary = summary.transpose()
summary["total_average_rating"] = summary["average_rating"]/summary["count"]
summary["total_average_rating"]=summary["total_average_rating"].round(4)
summary["average_votes"] = summary["num_votes"]/summary["count"]
summary["average_votes"] = summary["average_rating"].round(4)
genre_rating_votes = summary.loc[:,["total_average_rating","average_votes"]]
genre_rating_votes.index = [
    "Action",
    "Adventure",
    "Animation",
    "Biography",
    "Comedy",
    "Crime",
    "Documentary",
    "Drama",
    "Family",
    "Fantasy",
    "Film-Noir",
    "History",
    "Horror",
    "Music",
    "Musical",
    "Mystery",
    "Reality-TV",
    "Romance",
    "Sci-Fi",
    "Short",
    "Sport",
    "Talk-Show",
    "Thriller",
    "War",
    "Western",
]
genre_rating_votes.index = genre_rating_votes.index.rename("genres")
genre_rating_votes.to_csv(r"DSProject\csv\genre_rating_votes.csv")